import re

import jieba

with open('data/fff.txt', 'r', encoding='utf-8') as f:
    s = f.read()

# 移除特殊符号
s = re.sub(r'[，。 ]', '', s)

# 获取分词列表
items = jieba.lcut(s)
# 转为集合去重
tokens = set(items)

# 计算每个分词出现的次数，并存入字典
times = {}
for v in items:
    times[v] = times.get(v, 0) + 1

# 字典不能排序
# 将字典转为列表类型, [[分词, 次数], [分词, 次数]]
new_items = []
for k in times:
    new_items.append([k, times[k]])

# 匿名函数， 按照出现次数从大到小排序
new_items.sort(key=lambda x: [x[1]], reverse=True)
print(new_items)
